package com.larry.spark.rdd.transform

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object RDD_Oper_Filter_1 {

  def main(args: Array[String]): Unit = {
    //TODO  使用spark算子

    val conf = new SparkConf().setMaster("local[*]").setAppName("rdd")
    val sc = new SparkContext(conf)

    val rdd = sc.textFile("data/apache.log")

    //过滤
    val rdd2 = rdd.filter(_.contains("17/05/2015"))
    val rdd3 = rdd2.map(
      l => {
        val strings = l.split(" ")
        strings(6)
      }
    )
    rdd3.collect().foreach(println)
    sc.stop()
  }
}
